import re, requests, lxml.html


etree = lxml.html.etree
url = 'https://www.baidu.com/?tn=80035161_1_dg'

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
    'Cookie': 'BAIDUID=88248C797C2351B8AA0B52A5EB9242EA:FG=1; BIDUPSID=88248C797C2351B8AA0B52A5EB9242EA; PSTM=1545043899; BD_UPN=12314753; BDUSS=W41RzBSQVhPdjQwOVJnLS1KaXNpcWUzT215MG43cFI0VURzbVY2cUNoaTJTMWhjQVFBQUFBJCQAAAAAAAAAAAEAAADhUWJ12Lywos3Y2LwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAALa-MFy2vjBcd; delPer=0; BD_CK_SAM=1; PSINO=1; BDRCVFR[k2U9xfnuVt6]=mk3SLVN4HKm; BD_HOME=1; BDRCVFR[S4-dAuiWMmn]=7Gup0zg2JVtfj6snjndnHmkg17xuAT; H_PS_PSSID=28505_1467_21109_28584_28558_28414_22157; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; H_PS_645EC=0c3bdDn9gLH3SbauyBc1oEzS1ZNP6BsZ1dxCTCvoEpgbD74IbqAFd7H8QaiNRMM0dw',
}

response = requests.get(url, headers=headers)
# print(response.content.decode('utf8'))
html = etree.HTML(response.content.decode('utf8'))
infos = html.xpath('//span[@class="news-title"]//a/text()')
news = html.xpath('//div[@class="s-news-special s-news-item s-news-special-item-tpl-3 s-opacity-blank8"]/h2/a/text()')
print(news, len(news))
print(infos, len(infos))